import os
import gzip
import itertools
import time
import pickle

import pandas as pd
import numpy as np
import anndata as ad
import matplotlib.pyplot as plt
import seaborn as sns
from statannotations.Annotator import Annotator

from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, make_scorer, precision_score, recall_score

import scipy.cluster.hierarchy as sch
from scipy.stats import pearsonr, false_discovery_control, ttest_ind, mannwhitneyu, ttest_rel, wilcoxon, kruskal, brunnermunzel

from sknetwork.clustering import Louvain

import sys
sys.path.append('/mnt/d/housekeeping_genes')
import housekeepingMinerPy as hkg

adata_ml_corr = ad.read_h5ad('/mnt/d/housekeeping_genes/adata_corr_b.h5ad')
adata_ml_corr = adata_ml_corr[adata_ml_corr.obs.tcmr=='1',:]
adata_ml_corr.obs['boruta_groups_1_tcmr'] = adata_ml_corr.obs['boruta_groups_1_tcmr'].astype(int).astype('category')

boruta_sets_ = dict()
aux_list = []
t0 = time.time()
for layer in adata_ml_corr.layers:
    if layer in ['trns_expr', 'log1p', 'TMM1p', 'count']:
        continue

    t1 = time.time()
    # print(layer, end=': ')
    gn = []
    aux = hkg.mining.set_boruta_selection(adata_ml_corr[(adata_ml_corr.obs.outliers_hdbscan!=-1), (adata_ml_corr.var.equivalent=='non_equivalent')],
                                          layer=layer, class_col='boruta_groups_1_tcmr', n_set=3)
    aux_list.append(aux)
    for g in aux:
        gn.extend(g['genes'])

    f,c = np.unique(gn, return_counts = True)
    if c.size > 0:
        boruta_sets_[layer] = dict(zip(f,c))

    t2 = time.time()
    # print( np.round((t2-t1)/60, 2), end = ', ')
t3 = time.time()
print((t3-t0)/60)
print(boruta_sets_)

with open('dict_boruta_groups_1_tcmr.pkl', 'wb') as f:
    pickle.dump(boruta_sets_, f)

#with open('saved_dictionary.pkl', 'rb') as f:
#    loaded_dict = pickle.load(f)
